import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import ipywidgets as widgets
from ipywidgets import interact
# Plotly plotting support
import plotly.offline as py
py.init_notebook_mode()
import plotly.graph_objs as go
import plotly.figure_factory as ff
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='ggplot')
# this file joins the MCO shipping sorter scanner message with the packorder information and UPC
# our goal is to figure out for each UPC, what is the typical boxsize people use
# load the mco shipping sorter scanner message and the packorder info
mco_scan = pd.read_csv('MCO_Scan.csv')
wms_packorder = pd.read_csv('wms_packorderid.csv')
# load stockitem
stock_item = pd.read_csv('wms_stockitem.csv')
# change the name
mco_scan = mco_scan.rename(index=str, columns={"Shipping Unit Barcode": "SHIPPINGLUBARCODE"})
# convert to floats
mco_scan['SHIPPINGLUBARCODE'] = pd.to_numeric(mco_scan['SHIPPINGLUBARCODE'], errors='coerce')
wms_packorder['SHIPPINGLUBARCODE'] = pd.to_numeric(wms_packorder['SHIPPINGLUBARCODE'], errors='coerce')
# merge the two table together on shippinglubarcode
packorder_boxsize = pd.merge(mco_scan, wms_packorder, on='SHIPPINGLUBARCODE', how='inner')
# delete the unnecessary cols
del packorder_boxsize['Scanner Id']
del packorder_boxsize['Disposition Code']
del packorder_boxsize['ID']
del packorder_boxsize['CREATED']
# remove the duplicates from wms_stockitem
stock_item = stock_item.drop_duplicates(subset=['PACKORDERID'])
# join the two table
upc_boxsize = pd.merge(packorder_boxsize, stock_item, on='PACKORDERID', how='inner')
# delete cols
del upc_boxsize['Unnamed: 0']
del upc_boxsize['ID']
del upc_boxsize['CREATED']
# change the name
upc_boxsize = upc_boxsize.rename(index=str, columns={"MATERIALUPC": "UPC"})
from scipy import stats # will use this library to help find the mode
# group by upc
group_upc = upc_boxsize.groupby('UPC')
# define the cols
upc = []
size = []
# find the upc and size of pack
for name,group in group_upc:
# put all the upc's in the list
upc.append(name)
# get all the packaging barcode as a list
boxsize = group['Packaging Barcode'].tolist()
# find the mode
m = stats.mode(boxsize)
size.append(m[0][0])
# create a dataframe with upc and boxsize
find_boxsize = {'UPC': upc, 'Packaging Barcode': size}
find_boxsize_by_upc = pd.DataFrame(data=find_boxsize)
#find_boxsize_by_upc.to_csv('find_boxsize_by_upc.csv')
/Users/david.liu/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False. /Users/david.liu/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/merge.py:962: UserWarning: You are merging on int and float columns where the float values are not equal to their int representation
| UPC | Packaging Barcode | |
|---|---|---|
| 0 | 56100008 | 9006004000 |
| 1 | 77295000 | 1108005250 |
| 2 | 77295037 | 2301501000 |
| 3 | 77400006 | 9006004000 |
| 4 | 77400022 | 9006004000 |
| 5 | 77400074 | 9006004000 |
| 6 | 77400580 | 9006004000 |
| 7 | 77421302 | 9006004000 |
| 8 | 77431653 | 9006004000 |
| 9 | 77431777 | 1108005250 |
| 10 | 77431779 | 9006004000 |
| 11 | 77460187 | 9006004000 |
| 12 | 83460030 | -- |
| 13 | 94304831 | 9006004000 |
| 14 | 94307810 | 9006004000 |
| 15 | 94670150 | 1309006250 |
| 16 | 94670157 | 1309006250 |
| 17 | 99915000 | 1501201100 |
| 18 | 99918510 | 2301901600 |
| 19 | 810000413 | 9006004000 |
| 20 | 810000701 | 9006004000 |
| 21 | 810000725 | 9006004000 |
| 22 | 810000795 | 9006004000 |
| 23 | 810000927 | 9006004000 |
| 24 | 834674050 | 1108005250 |
| 25 | 834674053 | 1108005250 |
| 26 | 834678005 | 1108005250 |
| 27 | 834678023 | 1108005250 |
| 28 | 852142827 | 2201601220 |
| 29 | 910038130 | 9006004000 |
| ... | ... | ... |
| 8786 | 849101053955 | 1108005250 |
| 8787 | 849101053986 | 1701305000 |
| 8788 | 852661007283 | 9006004000 |
| 8789 | 852661007290 | 9006004000 |
| 8790 | 853084004408 | 2301901600 |
| 8791 | 853881004090 | 9006004000 |
| 8792 | 853881004625 | 1309006250 |
| 8793 | 858639005381 | 9006004000 |
| 8794 | 871180830503 | 1108005250 |
| 8795 | 871180880852 | 1701305000 |
| 8796 | 871896985396 | 1701305000 |
| 8797 | 880954550003 | 9006004000 |
| 8798 | 889526077419 | 1501201100 |
| 8799 | 890297100403 | 1401007500 |
| 8800 | 890306200824 | 1401007500 |
| 8801 | 890306201139 | 1309006250 |
| 8802 | 890324300855 | 2601905250 |
| 8803 | 890324301603 | 1401007500 |
| 8804 | 890324301612 | 1701305000 |
| 8805 | 890324302023 | 1108005250 |
| 8806 | 890324302606 | 1108005250 |
| 8807 | 890324302689 | 1108005250 |
| 8808 | 890324302731 | 1108005250 |
| 8809 | 890605682067 | 1401007500 |
| 8810 | 893610647074 | 1701305000 |
| 8811 | 931274230605 | 1108005250 |
| 8812 | 932110401146 | 1108005250 |
| 8813 | 932110481170 | 1108005250 |
| 8814 | 932110486070 | 1108005250 |
| 8815 | 978073763139 | 1401007500 |
8816 rows × 2 columns